rm(list = ls())

library(data.table)
library(xtable)

data_pooled <- readRDS('./replication_hasz/output/data/data_pooled.rds')
data <- data_pooled[study!='Pooled']
data[independent==1, strong_partisan:=NA]

# region
northeast <- c('Connecticut', 'Maine', 'Massachuesetts',
               'New Hampshire', 'Rhode Island', 'Vermont',
               'New Jersey', 'New York', 'Pennsylvania') #1
midwest <- c('Indiana', 'Illinois', 'Michigan', 'Ohio', 'Wisconsin',
             'Iowa', 'Kansas', 'Minnesota', 'Missouri',
             'Nebraska', 'North Dakota', 'South Dakota') #2
south <- c('Delaware',
           'Washington, D.C.',
           'Florida',
           'Georgia',
           'Maryland',
           'North Carolina',
           'South Carolina',
           'Virginia',
           'West Virginia',
           'Alabama', 'Kentucky', 'Mississippi',
           'Tennessee', 'Arkansas', 'Louisiana', 'Oklahoma', 'Texas')  #3
west <- c('Arizona', 'Colorado', 'Idaho',
          'New Mexico',
          'Montana', 'Utah', 'Nevada', 'Wyoming',
          'Alaska', 'California', 'Hawaii', 'Oregon', 'Washington')#4
data[, northeast:= ifelse((state %in% northeast), 1, 0)]
data[, midwest:= ifelse((state %in% midwest), 1, 0)]
data[, south:= ifelse((state %in% south), 1, 0)]
data[, west:= ifelse((state %in% west), 1, 0)]
data[study=='study 1', c('northeast', 'midwest', 'south', 'west'):=.(0, 0, 1, 0)]

data[state %in% midwest, region:=2]
data[state %in% south, region:=3]
data[state %in% west, region:=4]
data[is.na(region) & study=='study 1', region:=3]

# refine race variable
data[,black:=ifelse(race=='Black or African American',1,0)]
data[,asian:=ifelse(race=='Asian',1,0)]
data[,race_other:=ifelse(!(race %in% c('White', 'Black or African American', 'Asian')),1,0)]


# national characteristics registered voters CPS Nov 2020
cps <- fread('./replication_hasz/data/nov20pub.csv')
colnames(cps) <- tolower(names(cps))

# relevant vars:
# gereg
cps[, northeast:= ifelse((gereg==1), 1, 0)]
cps[, midwest:= ifelse((gereg==2), 1, 0)]
cps[, south:= ifelse((gereg==3), 1, 0)]
cps[, west:= ifelse((gereg==4), 1, 0)]

#pemaritl
cps[pemaritl>0, w_partner:=ifelse(pemaritl<3,1,0)]

#pesex
cps[pesex>0, male:=ifelse(pesex==1,1,0)]

#peeduca (check college)
cps[peeduca>0, college:=ifelse(peeduca>40,1,0)]

#ptdtrace
cps[ptdtrace>0, white:=ifelse(ptdtrace==1,1,0)]
cps[ptdtrace>0, black:=ifelse(ptdtrace==2,1,0)]
cps[ptdtrace>0, asian:=ifelse(ptdtrace==4,1,0)]
cps[ptdtrace>0, race_other:=ifelse( (ptdtrace==3 | ptdtrace>4) ,1,0)]

#pehspnon
cps[pehspnon>0, latino:=ifelse(pehspnon==1, 1, 0)]

#penatvty
cps[penatvty>0, native_born:=ifelse(penatvty==57,1,0)]

#pemlr
cps[pemlr>0, work:=ifelse(pemlr<3,1,0)]

#pes1, pes2
cps[(pes1==1 | pes2==1), registered_vote:=1]
keep <- c('work', 'native_born', 'latino',
          'white', 'black', 'asian', 'race_other',
          'college', 'male', 'w_partner', 'west', 'south', 'midwest', 'northeast')
cps <- cps[registered_vote==1, ..keep]

# Florida characteristics registered voters CPS Nov 2020
cps_florida <- fread('./replication_hasz/data/nov20pub.csv')
colnames(cps_florida) <- tolower(names(cps_florida))
cps_florida <- cps_florida[gestfips==12]

# relevant vars:
# gereg
cps_florida[, northeast:= ifelse((gereg==1), 1, 0)]
cps_florida[, midwest:= ifelse((gereg==2), 1, 0)]
cps_florida[, south:= ifelse((gereg==3), 1, 0)]
cps_florida[, west:= ifelse((gereg==4), 1, 0)]

#pemaritl
cps_florida[pemaritl>0, w_partner:=ifelse(pemaritl<3,1,0)]

#pesex
cps_florida[pesex>0, male:=ifelse(pesex==1,1,0)]

#peeduca (check college)
cps_florida[peeduca>0, college:=ifelse(peeduca>40,1,0)]

#ptdtrace
cps_florida[ptdtrace>0, white:=ifelse(ptdtrace==1,1,0)]
cps_florida[ptdtrace>0, black:=ifelse(ptdtrace==2,1,0)]
cps_florida[ptdtrace>0, asian:=ifelse(ptdtrace==4,1,0)]
cps_florida[ptdtrace>0, race_other:=ifelse( (ptdtrace==3 | ptdtrace>4) ,1,0)]

#pehspnon
cps_florida[pehspnon>0, latino:=ifelse(pehspnon==1, 1, 0)]

#penatvty
cps_florida[penatvty>0, native_born:=ifelse(penatvty==57,1,0)]

#pemlr
cps_florida[pemlr>0, work:=ifelse(pemlr<3,1,0)]

#pes1, pes2
cps_florida[(pes1==1 | pes2==1), registered_vote:=1]
keep <- c('work', 'native_born', 'latino',
          'white', 'black', 'asian', 'race_other',
          'college', 'male', 'w_partner', 'west', 'south', 'midwest', 'northeast')
cps_florida <- cps_florida[registered_vote==1, ..keep]

### ANES (partisanship, interest in local politics, attitudes towards migrants)
anes <- fread('./replication_hasz/data/anes_timeseries_2020_csv_20220210.csv')
anes[((V201008>0 & V201008<3) | V201009==1), registered_vote:=1]

anes[V202232==1, att_mig:=5]
anes[V202232==2, att_mig:=4]
anes[V202232==3, att_mig:=3]
anes[V202232==4, att_mig:=2]
anes[V202232==5, att_mig:=1]

anes[V201231x>0, democrat:=ifelse((V201231x==1 | V201231x==2),1,0)]
anes[V201231x>0, independent:=ifelse((V201231x>=3 & V201231x<=5),1,0)]
anes[V201231x>0, republican:=ifelse((V201231x==6 | V201231x==7),1,0)]

anes[V201229==1, strong_partisan:=1]
anes[V201229==2, strong_partisan:=0]
keep <- c('att_mig', 'democrat', 'independent', 'republican', 'strong_partisan')
anes <- anes[registered_vote==1, ..keep]


#### ANES Florida
anes_florida <- fread('./replication_hasz/data/anes_timeseries_2020_csv_20220210.csv')
anes_florida <- anes_florida[V203000==12]
anes_florida[((V201008>0 & V201008<3) | V201009==1), registered_vote:=1]

anes_florida[V202232==1, att_mig:=5]
anes_florida[V202232==2, att_mig:=4]
anes_florida[V202232==3, att_mig:=3]
anes_florida[V202232==4, att_mig:=2]
anes_florida[V202232==5, att_mig:=1]

anes_florida[V201231x>0, democrat:=ifelse((V201231x==1 | V201231x==2),1,0)]
anes_florida[V201231x>0, independent:=ifelse((V201231x>=3 & V201231x<=5),1,0)]
anes_florida[V201231x>0, republican:=ifelse((V201231x==6 | V201231x==7),1,0)]

anes_florida[V201229==1, strong_partisan:=1]
anes_florida[V201229==2, strong_partisan:=0]
keep <- c('att_mig', 'democrat', 'independent', 'republican', 'strong_partisan')
anes_florida <- anes_florida[registered_vote==1, ..keep]



data[,sample:='exp']
cps[, sample:='cps']
cps_florida[, sample:='cps fl']
anes[, sample:='anes']
anes_florida[, sample:='anes fl']

data_all <- rbind(data, cps_florida, anes_florida, cps, anes, fill=TRUE)
data_all[study=='Florida Study', sample:='study 1']
data_all[study=='U.S. Study', sample:='study 2']

summary_stats <- data_all[, .(lapply(.SD, mean, na.rm=TRUE), lapply(.SD, sd, na.rm=TRUE)), by=.(sample),
                          .SDcols=c('male' , 'college' , 'work' , 'w_partner',
                                    'northeast', 'midwest', 'south', 'west',
                                    'white' , 'black', 'asian', 'race_other', 'latino' , 'native_born',
                                    'democrat' , 'republican', 'independent' , 'strong_partisan',
                                    'att_mig' , 'interest_politics')]
vars <- c('male' , 'college' , 'work' , 'w_partner',
          'northeast', 'midwest', 'south', 'west',
          'white' , 'black', 'asian', 'race_other', 'latino' , 'native_born',
          'democrat' , 'republican', 'independent' , 'strong_partisan',
          'att_mig' , 'interest_politics')

summary_stats <- cbind(summary_stats, rep(vars,6))
names(summary_stats) <- c('sample', 'mean', 'sd', 'var')

summary_stats$mean <-  unlist(summary_stats$mean)
summary_stats$sd <-  unlist(summary_stats$sd)
summary_stats <- dcast(summary_stats[,.(var, sample, mean, sd)], var~sample, value.var=c('mean', 'sd'))
summary_stats <- summary_stats[vars, on="var"]

summary_stats[, mean_cps_anes:=mean_cps]
summary_stats[is.na(mean_cps_anes), mean_cps_anes:=mean_anes]
summary_stats[, sd_cps_anes:=sd_cps]
summary_stats[is.na(sd_cps_anes), sd_cps_anes:=sd_anes]
summary_stats[, mean_cps_anes_fl:=`mean_cps fl`]
summary_stats[is.na(mean_cps_anes_fl), mean_cps_anes_fl:=`mean_anes fl`]
summary_stats[, sd_cps_anes_fl:=`sd_cps fl`]
summary_stats[is.na(sd_cps_anes_fl), sd_cps_anes_fl:=`sd_anes fl`]

setcolorder(summary_stats, c('var', 'mean_study 1', 'sd_study 1', 
                             'mean_cps_anes_fl', 'sd_cps_anes_fl',
                             'mean_study 2', 'sd_study 2',
                             'mean_cps_anes', 'sd_cps_anes'))

print(xtable(summary_stats[,c('var', 'mean_study 1', 'sd_study 1',
                                      'mean_cps_anes_fl', 'sd_cps_anes_fl',
                                      'mean_study 2', 'sd_study 2',
                                      'mean_cps_anes', 'sd_cps_anes')],
                     caption="Summary Statistics",
                     digits=c(0,0,3,3,3,3,3,3,3,3), caption.placement='top', align='lccccccccc'),
      file = './replication_hasz/output/tables/tabB1.tex',
      include.rownames=FALSE, caption.placement = "top")

